home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
MacWorld 1999 July
/
Macworld (1999-07).dmg
/
Shareware World
/
Info
/
For Developers
/
Mops 3.4.sea
/
Mops source
/
System source
/
StrUtilities
< prev
next >
Wrap
Text File
|
1994-06-28
|
7KB
|
330 lines
\ Utility subroutines for the String+ class.
\ Separated from String+ and revised - Aug 87.
\ Error checking improved - May 88.
\ Version for Mops - June 89.
0 value CASE? \ True if case to be significant in comparisons
$ D constant RET \ Carriage return
0 value $START \ Addr of start of (chars of) current string.
\ ======== TRTBL class ========
\ Translate tables allow very fast searching of strings for specified sets
\ of characters. In effect we are separating the specification of what we
\ are searching for from the actual search operation itself. This allows an
\ uncluttered and extremely fast search operation (the SCAN: and <SCAN: methods
\ of class STRING+), and it also allows a very flexible (and easily extensible)
\ choice of what to search for. The setup time for translate tables can
\ generally be factored out of inner loops, or done at compile time, and is
\ quite fast, anyway.
\ We first define a class (trtbl) which is needed to define the table mapping
\ lower case letters to upper case. This table is then used by some of
\ the methods in the trtbl class proper.
:class (TRTBL) super{ object }
record
{ int COUNT
256 bytes THETBL
}
:m TBL: addr: theTbl ;m
:m >UC:
addr: theTbl & A +
addr: theTbl & a +
26 cmove ;m
:mcode TRANSC: \ ( c -- c' ) Translates 1 char using the table.
MOVE (SP),D0
MOVE.B 2(A2,D0.W),3(SP)
;mcode
;class
(trtbl) UCTBL \ Maps lower case letters to upper case, and
\ leaves everything else unchanged.
: XX
0 tbl: UCtbl 256 bounds
DO dup i c! 1+ LOOP
drop >uc: UCtbl ;
xx forget xx
:code (SELC) \ Subroutine used by SELCHAR: and SELCHARNC:.
ADDQ.W #1,(A2)
MOVE.W (A2)+,D1
MOVE.B D1,0(A2,D2.W)
;code
:class TRTBL super{ (trtbl) }
:mcode CLEAR:
loc
CLR.W (A2)+
MOVEQ #63,D0
loop CLR (A2)+
DBRA D0,loop
;mcode
:m PUT: { addr len -- }
addr addr: theTbl len 256 min cmove ;m
:mcode SELCHARS: \ ( addr len -- )
loc
POP D0 ; D0 = len
POP A1 ; A1 = addr
ADD D0,A1
MOVE D0,D1
ADD.W (A2),D1
MOVE.W D1,(A2)+
MOVEQ #0,D2
BRA.S lptst
loop MOVE.B -(A1),D2
MOVE.B D1,0(A2,D2.W)
SUBQ #1,D1
lptst DBRA D0,loop
;mcode
:mcode SELCHAR: \ ( c -- )
POP D2
BSR dic[(selc)]
;mcode
:mcode SELCHARNC: \ ( c -- ) "SelChar, no case".
\ Selects a character, and if it is a letter,
\ enters the same value in the LC and UC positions of the
\ table, so that case will in effect be ignored when the
\ table is used.
POP D2
LEA 10(dic[UCtbl]),A0 ; Offset is offs to ^obj, plus 2
MOVE.B 0(A0,D2.W),D2 ; Convert char to upper case
BSR dic[(selc)]
CMPI.B #$41,D2
BLT.S end
CMPI.B #$5A,D2
BGT.S end
ORI.B #$20,D2
MOVE.B D1,0(A2,D2.W)
end
;mcode
:mcode SELRANGE: \ ( lo hi -- )
loc
ADDQ #2,A2
POP D0 ; hi
POP D1 ; lo
ADD D1,A2
SUB D1,D0
BLT.S end
MOVEQ #1,D2
loop MOVE.B D2,(A2)+
lptst DBRA D0,loop
end
;mcode
:mcode INVERT:
loc
ADDQ #2,A2
MOVEQ #255,D0
loop TST.B (A2)
SEQ (A2)+
DBRA D0,loop
;mcode
;class
\ GETIT is a code subroutine to get the address and length of the active part
\ of the current string. A2 points to the string object.
\
\ Returns:
\ A0 addr of first char of the active part
\ D0 length of active part
\ D2 (lo half) high 16 bits of length - may be used as an outer loop
\ counter in DBxx loops.
\ CC result of subtracting POS from LIM to get the length.
\ $start addr of the start of the whole string
\
\ If this length turns out to be negative, $CHK is called to give an error trap.
\ We don't take a length of zero as an error (there are some situations where
\ this is quite legitimate). Those operations which don't like a zero
\ length can call $CHK themselves.
\ This subroutine must be called from a method, with A2 undisturbed.
\ Only A0, A2, D0 and D2 are altered.
:code GETIT
loc
MOVE (A2),A0 ; A0 = handle
MOVE (A0),A0 ; Dereference it - addr of start of string
MOVE A0,dic[$start] ; Leave in $start
ADD 8(A2),A0 ; Add POS, giving addr of start of active part
MOVE 12(A2),D0 ; D0 = LIM
SUB 8(A2),D0 ; Subtract POS, giving length
MOVE D0,D2
SWAP D2 ; Hi 16 bits to lo half of D2
TST D0 ; Test length
BGE.S end
JMP dic[$fail] ; If negative, error
end
;code
\ CCMP is the primitive subroutine for performing string comparison.
\ A0 -> string2
\ A1 -> string1
\ D0 = length
\ Assumes length is less than 64K.
\ Returns with the CC set appropriately.
\ Uses those registers.
:code CCMP
loc
SUBQ #1,D0
BMI.S equal
TST dic[case?]
BEQ.S nocase
loop1 CMPM.B (A0)+,(A1)+
DBNE D0,loop1
RTS
equal CMP.W D0,D0
RTS
nocase MOVEM D2/D3/A2,-(SP)
MOVEQ #0,D2
LEA 10(dic[UCtbl]),A2
loop2 CMPM.B (A0)+,(A1)+
lp2tst DBNE D0,loop2
BEQ.S end
MOVE.B -1(A1),D2
MOVE.B 0(A2,D2.W),D3
MOVE.B -1(A0),D2
CMP.B 0(A2,D2.W),D3
BEQ.S lp2tst
end MOVEM (SP)+,D2/D3/A2
;code
\ CSCH and <CSCH are the primitive subroutines for searching for a single
\ character.
\ A0 -> string
\ D0 = length
\ D2 = length (hi)
\ D1 = char (rest must be zero)
\ Both routines return with the CC set appropriately.
:code CSCH
loc
TST dic[case?]
BEQ.S nocase
BRA.S lp1tst ; Note: we enter the loop with "not equal"
loop1 CMP.B (A0)+,D1
lp1tst DBEQ D0,loop1
DBEQ D2,loop1
RTS
nocase MOVEM D1/D2/A2,-(SP)
LEA 10(dic[UCtbl]),A2
MOVE.B 0(A2,D1.W),D1
MOVEQ #1,D2 ; Set "not equal", clear top 3 bytes of D2
BRA.S lp2tst
outer MOVE D2,4(SP)
loop2 MOVE.B (A0)+,D2
CMP.B 0(A2,D2.W),D1
lp2tst DBEQ D0,loop2
MOVEM 4(SP),D2 ; Recover outer loop counter, preserving CC
DBEQ D2,outer
MOVEM (SP)+,D1/D2/A2
end
;code
:code <CSCH
loc
TST dic[case?]
BEQ.S nocase
BRA.S lp1tst ; Note: we enter the loop with "not equal"
loop1 CMP.B -(A0),D1
lp1tst DBEQ D0,loop1
DBEQ D2,loop1
BRA.S end
nocase MOVEM D1/D2/A2,-(SP)
LEA 10(dic[UCtbl]),A2
MOVE.B 0(A2,D1.W),D1
MOVEQ #1,D2 ; Set "not equal", clear top 3 bytes of D2
BRA.S lp2tst
outer MOVE D2,4(SP)
loop2 MOVE.B -(A0),D2
CMP.B 0(A2,D2.W),D1
lp2tst DBEQ D0,loop2
MOVEM 4(SP),D2 ; Recover outer loop counter, preserving CC
DBEQ D2,outer
MOVEM (SP)+,D1/D2/A2
end
;code
\ CMPSTR ( addr1 len1 addr2 len2 -- n ) compares 2 strings.
\ Case is significant if CASE? is set to true.
\ Returns:
\ -1 first string low
\ 0 strings are equal
\ 1 first string high
\ We assume the lengths are both less than 64K.
\
\ Uses D0,D1,D2,A0,A1.
:code CMPSTR
loc
POP D0 ; D0 = len2
POP A0 ; A0 = addr2
POP D1 ; D1 = len1
MOVE (SP),A1 ; A1 = addr1
MOVEQ #0,D2 ; D2 will hold return result
CMP.W D1,D0 ; Compare lengths
BEQ.S docmp
BHI.S op2long
MOVEQ #1,D2
BRA.S docmp
op2long MOVE.W D1,D0
MOVEQ #-1,D2
docmp BSR dic[ccmp]
BEQ.S end
SMI D2
ORI.B #1,D2
EXT.W D2
EXT.L D2
end MOVE D2,(SP)
;code
\ INSTEAD ( c-old c-new -- ) may be used just after a SCON is defined.
\ Within the SCON, it replaces any occurrences of c-old with c-new. This
\ operation is useful for creating SCONs containing special characters
\ such as tab.
: INSTEAD { c-old c-new -- }
latest name> ex-gen bounds \ SCONs use DOES> so require EX-GEN
DO i c@ c-old = IF c-new i c! THEN
LOOP ;